/* Get raw data and create full dataset */

***** Set directories 
local dir_do 		"~/Dropbox/Retirement gaming/do_dataverse"
local dir_raw 		"~/Dropbox/Retirement gaming/raw"
local dir_clean 	"~/Dropbox/Retirement gaming/clean"

clear all
local dataname "admindata.dta" // name dataset produced by this program

***firm-level raw data****
import delimited "`dir_raw'/cabezales.txt", delimiter("|")
rename v1 aportaci 
rename v4 Mescargo 
rename v7 ciiu 
rename v5 Tipocontr
rename v2 j /* firm id */
rename v3 j2 
replace j="" if j=="DESEMPLEO"
replace j2="" if j2=="DESEMPLEO"
replace j2="" if j2=="MATERNIDAD"
replace j="" if j=="MATERNIDAD"
replace j="" if j=="ENFERMEDAD"
replace j2="" if j2=="ENFERMEDAD"
destring j, replace 
destring j2, replace
rename v6 ndep 
compress
egen ci=mode(ciiu), by (j Mescargo) maxmode
egen ap=mode(aportaci), by (j Mescargo) maxmode
egen ti=mode(Tipocontr), by (j Mescargo) maxmode
egen de=sum(ndep), by (j Mescargo)
collapse (mean) ci ap ti de, by (j Mescargo)
rename ci ciiu
rename ap aportaci
rename ti Tipocontr
rename de ndep
save "`dir_clean'/cabezales.dta", replace

*** worker-level raw data ****
clear
import delimited "`dir_raw'/personas.txt", delimiter("|")
rename v1 i /* person id */
rename v2 sexo
rename v3 nacionalidad
replace sexo=0 if sexo==2
gen Fnac=date(v4, "DMY") 
replace Fnac=. if Fnac<-20000 // impute missing birth date if before 1903 (there are 3,879 individuals with birth date on 01/01/1901)
format Fnac %td
drop v4 
compress
save "`dir_clean'/personas.dta", replace

clear

foreach num in 1 2  {
clear

import delimited "`dir_raw'/lineas_`num'.txt", delimiter("|") varnames(nonames)

rename v1 aportaci 
rename v2 j
rename v3 j2
rename v4 obra 
rename v5 i 
rename v7 Mescargo
rename v8 fechaingr 
rename v9 fechaegr
rename v10 causal
rename v11 computo
rename v12 exone
rename v13 diasTRA 
rename v14 horasTRA 
rename v15 horasSEM 
rename v16 SegS
rename v17 vf 
rename v18 tipREM
rename v20 remC1 
rename v21 remC2 
rename v22 remC3 

gen SD=0 
replace SD=1 if j=="DESEMPLEO"
replace j="" if j=="DESEMPLEO"
replace j2="" if j2=="DESEMPLEO"

gen mat=0 
replace mat=1 if j=="MATERNIDAD"
replace j2="" if j2=="MATERNIDAD"
replace j="" if j=="MATERNIDAD"

gen enf=0 
replace enf=1 if j=="ENFERMEDAD"
replace j="" if j=="ENFERMEDAD"
replace j2="" if j2=="ENFERMEDAD"

destring j, replace
destring j2, replace

gen Fing=.
replace Fing= date(fechaingr, "DMY")
format Fing %td
drop fechaingr
 
gen Fegr=.
replace Fegr= date(fechaegr, "DMY")
format Fegr %td
drop fechaegr

compress

sort i Mescargo j

save "`dir_clean'/lineas_`num'.dta", replace
}
append using "`dir_clean'/lineas_1.dta"
merge m:1 i using "`dir_clean'/personas.dta"
keep if _merge==3
drop _merge
sort i Mescargo j
save "`dir_clean'/trabajadores_1996_2016.dta", replace

use "`dir_clean'/trabajadores_1996_2016.dta", clear
merge m:1 j Mescargo using "`dir_clean'/cabezales.dta"
keep if _merge==3 
drop _merge
compress
save "`dir_clean'/trabajadores_1996_2016cEMP.dta", replace


********** DATES AND PRICE ADJUSTMENTS **********

use "`dir_clean'/trabajadores_1996_2016cEMP.dta", clear

tostring Mescargo, replace
g t=date(Mescargo, "YM")
g year=yofd(t)
g month=month(t)
replace t=mofd(t)
format t %tm
destring Mescargo, replace
sum Mescargo
sum t, f 


run "`dir_do'/ipc.do"
sum ipc
count

save "`dir_clean'/temp.dta", replace

*************************************************

use "`dir_clean'/temp.dta", clear

** Employment status **
g status		=1 if vf==1	 
replace status	=2 if vf==2 | vf==3 | vf==4 | vf==5 | vf==6 | vf==7 | vf==34  
replace status	=3 if vf==12   
replace status	=4 if vf==13 | vf==14 | (vf>=35 & vf<=38) 
replace status	=5 if vf==15 | vf==16 | vf==17 | vf==49  | vf==64  
replace status	=6 if vf==23 | vf==24 | vf==25 | vf==31 | vf==56 
replace status	=7 if vf==20 | vf==47 | vf==53 | vf==57 | vf==61 |  (vf>=65 & vf<=68) | vf==87
replace status	=8 if vf==76 
replace status	=9 if status==. & vf!=.
replace status	=. if vf==.
label define status 1 "Sole owner" 2 "Partner, director or administrator" 3 "Employee" 4 "Temp worker" 5 "Hourly worker" 6 "On leave, UI or disability" 7 "Public sector" 8 "Private Education" 9 "Other"
label values status status
tab status, gen(status_)
label var status_1 "Sole owner"
label var status_2 "Partner, director or adminstrator"
label var status_3 "Employee"
label var status_4 "Temp worker"
label var status_5 "Hourly worker"
label var status_6 "On leave, UI or disability" 
label var status_7 "Public sector"
label var status_8 "Private Education" 
label var status_9 "Other"

* type of remuneration
replace   tipREM=. if tipREM==7
label define tipREM 1 "monthly" 2 "daily" 3 "pieceworker" 4 "commission" 5 "mixed" 6 "unpaid"  
label values tipREM tipREM
tab tipREM, g(trem_)

replace horasSEM=. if horasSEM==99
replace horasTRA=. if horasTRA==99
replace diasTRA=. if  diasTRA==99
foreach X in horasSEM horasTRA diasTRA {
replace `X'=0 if `X'==.
}

* reason for leaving payroll
tab causal, gen(causal_) 
replace causal_3=causal_13 if causal_3==0
drop causal_13
drop   causal_8 causal_9 causal_10 causal_11 causal_12  causal_14 causal_15 causal_16 causal_17 causal_18 causal_19  causal_20
gen causal_otros=1 if causal_1==0 & causal_2==0 & causal_3==0 & causal_4==0 & causal_5==0 & causal_6==0 & causal_7==0 
tab causal if causal_otros==1
replace causal_otros=0 if causal_otros==.
rename causal_7 causal_8
rename causal_6 causal_7
foreach i in 1 2 3 4 5 7 8 {
replace causal_`i'=0 if causal_`i'==.
}
drop causal
summ causal_*
label var causal_1 "Voluntario"
label var causal_2 "Despido"
label var causal_3 "Fallecimiento"  
label var causal_4 "Termino Contrato"
label var causal_5 "Jubilacion"
label var causal_7 "Oficio uso exclusivo BPS"
label var causal_8 "Baja por cambio de titular"
label var causal_otros "Otros motivos"

** EARNINGS, UNEMPLOYMENT INSURANCE, SICKNESS AND MATERNITY LEAVE **
** Generate earnings in thousands of pesos of Dec 2015
foreach X in remC1 remC2 remC3 {
	replace `X'=(`X'/1000)/ipc
}
egen W=sum(remC1), by(i j t)
replace W=. if W==0
sum W

* Mark individual-months with paid leave
bys i t: egen unemployment=max(SD)
bys i t: egen maternity=max(mat)
bys i t: egen sickness=max(enf)
* Amounts of paid leave
cap drop aux
g aux=0
replace aux=remC1 if SD==1
bys i t: egen amt_unemployment=sum(aux)
replace aux=0
replace aux=remC1 if mat==1
bys i t: egen amt_maternity=sum(aux)
replace aux=0
replace aux=remC1 if enf==1
bys i t: egen amt_sickness=sum(aux)
drop aux
sum amt_* if j!=.

*********** KEEP ONE LINE PER i j t ***********

**First, create summary variables I want to keep 
bys i j: egen vf_mode=mode(vf)
bys i j: egen status_mode=mode(status), minmode
foreach X of varlist remC1 remC2 remC3 horasTRA diasTRA horasSEM {
	bys i j t: egen `X'_sum = sum(`X')
}
foreach X of varlist remC1 remC2 remC3 horasTRA diasTRA horasSEM vf tipREM status_* status trem_* {
	bys i j t: egen `X'_max = max(`X')
}
foreach X of varlist aportaci vf tipREM status {
	bys i j t: egen `X'_min = min(`X')
}

**drop duplicates  i j t***
duplicates tag i j t, gen(dup)
drop if aportaci_min==1 & aportaci!=1 & dup!=0
drop dup
duplicates tag i j t, gen(dup)
egen aw=sum(remC1), by(i j t)
egen maxw=max(remC1), by(i j t)
drop if remC1<maxw & remC1!=. & maxw!=. & dup!=0
drop maxw dup
duplicates tag i j t, gen(dup)
generate aleatorio = (1-(-1))*runiform() + (-1)
egen maxa=max(aleatorio), by (i j t)
drop if aleatorio<maxa & dup!=0
drop maxa dup

duplicates r i j t

** EARNINGS INCLUDING PAID LEAVE and UI (added to main job) **
* Main job
g aux1=remC1 if j!=.
bys i t: egen aux2=max(aux1)
g mainjob=remC1==aux2
drop aux1 aux2
* Create Wben 
g ben=amt_unemployment+amt_maternity+amt_sickness
g Wben=W+ben if j!=. & mainjob==1
drop if j==.
sum W Wben

** LABEL EARNINGS AND OTHER PAYMENTS **
label var remC1 "Earnings"
label var remC2 "Aguinaldo"
label var remC3 "Retroactive payments"
label var W		"Earnings"
label var Wben 	"Earnings plus social insurance benefits"
label var trem_1 "Receives salary"
label var trem_2 "Receives hourly pay"
label var trem_3 "Pieceworker"
label var trem_4 "Receives commission"
label var trem_5 "Receives mixed pay"
label var trem_6 "Unpaid work"

save "`dir_clean'/`dataname'", replace


*********************************************************************************
*FULL TIME EQUIVALENT
*********************************************************************************
use "`dir_clean'/`dataname'", clear

sum dias* horas*
replace diasTRA_sum=30 if diasTRA_sum>=30 & diasTRA_sum!=. // cap days in month at 30
replace diasTRA_max=30 if diasTRA_max>=30 & diasTRA_max!=. // cap days in month at 30
replace horasSEM_max=72 if horasSEM_max>72 & horasSEM_max!=. // cap weekly hours at 12x6
replace horasSEM_sum=72 if horasSEM_sum>72 & horasSEM_sum!=. // cap weekly hours at 12x6
foreach var of varlist diasTRA* horasTRA* horasSEM* {
	replace `var'=0 if `var'<0
}
sum dias* horas*

* MONTHLY PAY (report typicall weekly hours)
g hrsmonth			= diasTRA_sum * horasSEM_max/7 	if tipREM_min==1 & diasTRA_sum>0  & horasSEM_sum>0
replace hrsmonth 	= diasTRA_sum * 40/7 			if tipREM_min==1 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum==0 // I assume that salaried workers without hours reported work full time
replace hrsmonth 	= 0 							if tipREM_min==1 & diasTRA_sum==0 & horasSEM_sum==0 & (horasTRA_sum==0 | W==0)
replace hrsmonth 	= 30 * horasSEM_max/7 			if tipREM_min==1 & diasTRA_sum==0 & horasSEM_sum>0 & W>0
replace hrsmonth 	= diasTRA_sum * horasTRA_sum  	if tipREM_min==1 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum>0 & horasTRA_sum<=12

* DAILY PAY (about 1/2 report typicall weekly hours)
replace hrsmonth	= diasTRA_sum * horasSEM_max/7 	if tipREM_min==2 & diasTRA_sum>0  & horasSEM_sum>0
replace hrsmonth	= diasTRA_sum * 8 				if tipREM_min==2 & diasTRA_sum>0  & horasSEM_sum==0 & horasTRA_sum==0 // if hours not reported, assume full time
replace hrsmonth 	= 0 							if tipREM_min==2 & diasTRA_sum==0 & horasSEM_sum==0 & (horasTRA_sum==0 | W==0) 
replace hrsmonth 	= 30 * horasSEM_max/7 			if tipREM_min==2 & diasTRA_sum==0 & horasSEM_sum>0 & horasTRA_sum==0 & W>0	// I assume the worked the full month if weekly hrs are reported and days=0
replace hrsmonth 	= diasTRA_sum * horasTRA_sum  	if tipREM_min==2 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum>0 & horasTRA_sum<=12
replace hrsmonth 	= horasTRA_sum 					if tipREM_min==2 & diasTRA_sum==0 & horasSEM_sum==0 & W>0 & horasTRA_sum>12 

* PIECEWISE PAY (about 1/2 report typicall weekly hours)
replace hrsmonth	= diasTRA_sum * horasSEM_max/7 	if tipREM_min==3 & diasTRA_sum>0  & horasSEM_sum>0
replace hrsmonth	= diasTRA_sum * 40/7  			if tipREM_min==3 & diasTRA_sum>0  & horasSEM_sum==0 & horasTRA_sum==0 // if hours not reported, assume full time
replace hrsmonth 	= 0 							if tipREM_min==3 & diasTRA_sum==0 & horasSEM_sum==0 & horasTRA_sum==0 & W==0
replace hrsmonth 	= 30 * horasSEM_max/7 			if tipREM_min==3 & diasTRA_sum==0 & horasSEM_sum>0 & horasTRA_sum==0 & W>0	// I assume the worked the full month if weekly hrs are reported and days=0
replace hrsmonth 	= horasTRA_sum * diasTRA_sum	if tipREM_min==3 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum>0 & horasTRA_sum<=12
replace hrsmonth 	= horasTRA_sum 					if tipREM_min==3 & diasTRA_sum==0 & horasSEM_sum==0 & W>0 & horasTRA_sum>12 

* COMMISSION PAY OR MIXED 
replace hrsmonth	= diasTRA_sum * horasSEM_max/7 	if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum>0  & horasSEM_sum>0
replace hrsmonth 	= diasTRA_sum * 40/7 			if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum==0 // I assume that salaried workers without hours reported work full time
replace hrsmonth 	= 0 							if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum==0 & horasSEM_sum==0 & horasTRA_sum==0 & W==0
replace hrsmonth 	= 30 * horasSEM_max/7 			if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum==0 & horasSEM_sum>0 & horasTRA_sum==0 & W>0	// I assume the worked the full month if weekly hrs are reported and days=0
replace hrsmonth 	= diasTRA_sum * horasTRA_sum 	if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum>0 & horasTRA_sum<=12
replace hrsmonth 	= diasTRA_sum * horasTRA_sum/7 	if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum>=35 & horasTRA_sum<=60 
*Existen casos de empleados (vf_1) con remuneracion mixta or comision que no reportan dias ni horas de trabajo pero sí earnings. A estos casos se los ha dejado missing. 

* UNPAID  
replace hrsmonth	= diasTRA_sum * horasSEM_max/7 	if tipREM_min==6 & diasTRA_sum>0  & horasSEM_sum>0
replace hrsmonth 	= diasTRA_sum * 40/7 			if tipREM_min==6 & diasTRA_sum>0 & horasSEM_sum==0 & horasTRA_sum==0 // I assume that unpaid workers without hours reported work full time
replace hrsmonth 	= 0 							if tipREM_min==6 & diasTRA_sum==0 & horasSEM_sum==0 & horasTRA_sum==0 
replace hrsmonth 	= 30 * horasSEM_max/7 			if tipREM_min==6 & diasTRA_sum==0 & horasSEM_sum>0 & horasTRA_sum==0 	// I assume the worked the full month if weekly hrs are reported and days=0
replace hrsmonth 	= diasTRA_sum * horasTRA_sum 	if tipREM_min==6 & diasTRA_sum>0 & horasSEM_sum==0 & horasTRA_sum>0 & horasTRA_sum<=12

* WORKERS ON LEAVE (set hours worked missing)
replace hrsmonth 	= 0 							if status_6==1 & W==0

* SET HOURS TO ZERO IF MISSING AND EARNINGS==0 (and not unpaid worker)
replace hrsmonth	= 0								if hrsmonth==. & W==0 & tipREM_min!=6 


save "`dir_clean'/`dataname'", replace

clear all
exit
